library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.3
## ✓ tibble 2.1.3 ✓ dplyr 0.8.4
## ✓ tidyr 1.0.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.4.0
## Warning: package 'ggplot2' was built under R version 3.6.2
## ── Conflicts ───────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(tidytuesdayR)
## Warning: package 'tidytuesdayR' was built under R version 3.6.2
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(dplyr)
library(tidyr)
library(plotly)
## Warning: package 'plotly' was built under R version 3.6.2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
Thank you Prof. Jo Hardin for walking through this.
tuesdata <- tidytuesdayR::tt_load('2020-12-22')
## --- Compiling #TidyTuesday Information for 2020-12-22 ----
## --- There is 1 file available ---
## --- Starting Download ---
##
## Downloading file 1 of 1: `big-mac.csv`
## --- Download complete ---
# or tuesdata <- tidytuesdayR::tt_load(2020, week = 52)
## this weeks TidyTuesday!
#tidytuesdayR::use_tidytemplate()
bigmac <- tuesdata$'big-mac'
#save the data in a csv file
write_csv(bigmac, "big-mac.csv")
bigmac %>%
group_by(name) %>%
summarize(count = n())
## # A tibble: 57 x 2
## name count
## <chr> <int>
## 1 Argentina 33
## 2 Australia 33
## 3 Azerbaijan 5
## 4 Bahrain 5
## 5 Brazil 33
## 6 Britain 33
## 7 Canada 33
## 8 Chile 33
## 9 China 33
## 10 Colombia 28
## # … with 47 more rows
We notice that most of the countries have 33 observations, but some only have as little as 5. What is Euro Area?. Countries like Germany and Austria are missing, so it could potentially be a combinatino of European countries. Finally, it should be noted that both UAE and United Arab Emirates are included. What is the reasoning behind these labels?
bigmacW <- bigmac %>%
filter(name %in% c("Australia", "Canada", "United States", "Britain", "Euro area"))
bigmacW %>%
group_by(name) %>%
summarize(count = n())
## # A tibble: 5 x 2
## name count
## <chr> <int>
## 1 Australia 33
## 2 Britain 33
## 3 Canada 33
## 4 Euro area 33
## 5 United States 33
Let’s take a look at Big Mac prices:
ggplot(bigmac, aes(x = date, y = dollar_price)) +
geom_point(aes(color = name)) +
geom_smooth(aes(color = name), se = FALSE) +
ggtitle("Big Mac Prices in USD")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 17709
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 376.65
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.3079e+05
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 17709
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 376.65
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.3079e+05
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 17709
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 376.65
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.3079e+05
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 17709
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 376.65
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.3079e+05
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 17709
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 376.65
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.3079e+05
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 17709
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 376.65
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.3079e+05
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 17709
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 376.65
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.3079e+05
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 17709
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 376.65
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.3079e+05
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 17709
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 376.65
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.3079e+05
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 17709
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 376.65
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.3079e+05
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 17709
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 376.65
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.3079e+05
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 17709
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 376.65
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.3079e+05
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 17709
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 376.65
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.3079e+05
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 17709
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 376.65
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 1.3079e+05
Let’s have a bit more clarity by limiting which countries are shown:
ggplot(bigmacW, aes(x = date, y = dollar_price)) +
geom_point(aes(color = name)) +
geom_smooth(aes(color = name), se = FALSE) +
xlab("purchase price date") +
ggtitle("Big Mac prices in USD")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(bigmacW, aes(x = date, y = local_price)) +
geom_point(aes(color = name)) +
geom_smooth(aes(color = name), se=FALSE) +
xlab("purchase price date") +
ggtitle("Big Mac Prices in local currency")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ECONOMICS
bigmacUS <- bigmac %>%
filter(name %in% c("United States")) %>%
mutate(local_US_price = local_price) %>%
select(date, local_US_price)
bigmac <- full_join(bigmac, bigmacUS, by = "date") %>%
mutate(bigmac_exchange = local_price / local_US_price)
bigmac <- bigmac %>%
mutate(value = (bigmac_exchange - dollar_ex) / dollar_ex)
The value on the y-axis combines the exchange rate for the Big Macs as well as the value for the exchange rate for the currency.
Let’s see if the Big Mac exchange rate and currency exchange rate have reasonable correlation. Note: There are a few countries with enormous exchange rates. They unfortunately skew our ablity to compare the Big Mac exchange rates and USD exchange rate. Therefore, we will filter them our before making our descriptive plot.
bigmac %>%
filter(dollar_ex < 10) %>%
ggplot(aes(x = dollar_ex, y = bigmac_exchange)) +
geom_point()
ggplot(bigmac, aes(x = date, y = value)) +
geom_point(aplha = 0.2, aes(col = ifelse(value <= 0, "negative", "positive"))) +
geom_hline(yintercept = 0) +
geom_line(data = filter(bigmac, name == "Britain"), mapping = aes(x = date, y = value), se = FALSE, col = "yellow") +
theme(legend.position = "none") +
xlab("the Big Mac index") +
ggtitle("Big Mac Index, the yellow line is vs Britain") +
scale_color_manual(values = c("red", "blue"))
## Warning: Ignoring unknown parameters: aplha
## Warning: Ignoring unknown parameters: se
Creating interactive lines with plotly
test <- ggplot(bigmac, aes(x = date, y=value)) +
geom_point(aplha = 0.2, aes(col = ifelse(value <= 0, "negative", "positive"), text = sprintf("country: %s", name))) +
geom_hline(yintercept = 0) +
geom_line(aes(group = name, text = sprintf("country: %s", name)), se = FALSE, col = "grey") +
geom_line(data = filter(bigmac, name == "Britain"), mapping = aes(x = date, y = value), se = FALSE, col = "yellow") +
theme(legend.position = "none") +
xlab("the Big Mac index") +
ggtitle("Big Mac Index, blue line is vs Britian") +
scale_color_manual(values=c("red", "blue"))
## Warning: Ignoring unknown parameters: aplha
## Warning: Ignoring unknown aesthetics: text
## Warning: Ignoring unknown parameters: se
## Warning: Ignoring unknown aesthetics: text
## Warning: Ignoring unknown parameters: se
ggplotly(test, tooltip = "text")
base <- plot_ly(highlight_key(bigmac, ~name)) %>%
group_by(name)
test2 <- base %>%
group_by(name) %>%
add_lines(x = ~date, y = ~value) %>%
add_markers(x = ~date, y = ~value)
highlight(test2,
on = "plotly_hover",
selective = FALSE,
dynamic = FALSE,
color = "blue",
persistent = FALSE)
## Warning: The following arguments are not supported:
## selective
## Arguments such as: hoverinfo and showInLegend
## have been replaced by selected and other
## Setting the `off` event (i.e., 'plotly_doubleclick') to match the `on` event (i.e., 'plotly_hover'). You can change this default via the `highlight()` function.